#Alexander F. Gazmararian
#afg2@princeton.edu
#January 9, 2024

#Purpose: Validate logic behind anthracite falsification test. Processes and analyses data.

#load packages
library(tidyverse)
library(tidylog)
library(tseries)
library(dynlm)
library(modelsummary)
library(here)
library(lmtest)

#Function to format LaTeX table
source(here("code", "fun", "fix_txt.r"))

#load and merge data
ng <- readRDS(here("data", "input", "eia_naturalgasannual", "eia_nga.rds"))
coal <- readRDS(here("data", "input", "eia_annualcoalreport", "eia_annualcoalreport.rds"))
prod <- full_join(ng, coal, by = "year")
#subset to years where both coal and gas data overlap
prod <- filter(prod, year >= 1949 & year <= 2021)
#create non-ant coal total
prod$nonant <- rowSums(prod[,c("bit","sub","lig")], na.rm= TRUE)
#define time series objects
ant <- ts(scale(prod$ant)[,1], start = 1949, end = 2021, frequency = 1)
gas <- ts(scale(prod$gasprod)[,], start = 1949, end = 2021, frequency = 1)
nonant <- ts(scale(prod$nonant)[,], start = 1949, end = 2021, frequency = 1)
prod <- data.frame(prod)
#estimate models
##anthracite coal
m.ant.base <- dynlm(ant ~ L(gas, 1), start = 1949, end = 2021)
m.ant <- dynlm(ant ~ L(ant, 1:5) + L(gas, 1), start = 1949, end = 2021)
summary(m.ant)
##perform Durbin-Watson test for autocorrelation
dw.ant <- lmtest::dwtest(m.ant)
dw.ant
dw.ant.base <- lmtest::dwtest(m.ant.base)
dw.ant.base
#plot residual correlogram
acf(resid(m.ant.base))
acf(resid(m.ant))
##non-anthracite coal
m.nonant <- dynlm(nonant ~ L(nonant, 1:5) + L(gas, 1), start = 1949, end = 2021)
summary(m.nonant)
m.nonant.base <- dynlm(nonant ~ L(gas, 1), start = 1949, end = 2021)
#perform Durbin-Watson test for autocorrelation
dw.nonant <- lmtest::dwtest(m.nonant)
dw.nonant
dw.nonant.base <- lmtest::dwtest(m.nonant.base)
dw.nonant.base
# Plot residual correlogram
acf(resid(m.nonant))
#Table J3----
#Table formating
gof_map[gof_map$raw=="nobs",]$clean <- "$N$"
gof_map[gof_map$raw=="adj.r.squared",]$clean <- "Adjusted $R^2$"
#Create able output
fileauto <- here("output", "tables", "si_tab_J3_var_coalprod.txt")
modelsummary(
  list(m.ant.base, m.ant, m.nonant.base, m.nonant),
  stars = c("*"=.1,"**"=.05,"***"=.01),
  vcov = "HAC",
  coef_map = c(
    "(Intercept)" = "Intercept",
    "L(gas, 1)" = "Gas$_{t-1}$",
    "L(ant, 1:5)1" = "Anthracite$_{t-1}$",
    "L(ant, 1:5)2" = "Anthracite$_{t-2}$",
    "L(ant, 1:5)3" = "Anthracite$_{t-3}$",
    "L(ant, 1:5)4" = "Anthracite$_{t-4}$",
    "L(ant, 1:5)5" = "Anthracite$_{t-5}$",
    "L(nonant, 1:5)1" = "Non-Anthracite$_{t-1}$",
    "L(nonant, 1:5)2" = "Non-Anthracite$_{t-2}$",
    "L(nonant, 1:5)3" = "Non-Anthracite$_{t-3}$",
    "L(nonant, 1:5)4" = "Non-Anthracite$_{t-4}$",
    "L(nonant, 1:5)5" = "Non-Anthracite$_{t-5}$"
  ),
  gof_omit = "IC|RMSE|R2$|Log|Std",
  gof_map = gof_map,
  add_rows = data.frame(
    c("Durbin-Watson test", "$p$-value"),
    round(c(dw.ant.base$statistic, dw.ant.base$p.value), 3),
    round(c(dw.ant$statistic, dw.ant$p.value), 3),
    round(c(dw.nonant.base$statistic, dw.nonant.base$p.value), 3),
    round(c(dw.nonant$statistic, dw.nonant$p.value), 3)
  ),
  col.names = c("", "(1)", "(2)", "(3)", "(4)"),
  output = "latex",
  escape = FALSE
) %>%
  kableExtra::add_header_above(c(" " = 1, "Anthracite" = 2, "Non-Anthracite" = 2)) %>%
  cat(., file = fileauto)
fix_txt(fileauto)